In [ ]:
#read all csv file in diffrent dataframes 
import pandas as pd
import os

p23 = pd.read_csv('iprecipitation_2023.csv')
p22 = pd.read_csv('iprecipitation_2022.csv')
p21 = pd.read_csv('iprecipitation_2021.csv')
p20 = pd.read_csv('iprecipitation_2020.csv')
p19 = pd.read_csv('iprecipitation_2019.csv')
p18 = pd.read_csv('iprecipitation_2018.csv')
p17 = pd.read_csv('iprecipitation_2017.csv')
p16 = pd.read_csv('iprecipitation_2016.csv')
p15 = pd.read_csv('iprecipitation_2015.csv')
p14 = pd.read_csv('iprecipitation_2014.csv')
p13 = pd.read_csv('iprecipitation_2013.csv')
p12 = pd.read_csv('iprecipitation_2012.csv')
p11 = pd.read_csv('iprecipitation_2011.csv')
p10 = pd.read_csv('iprecipitation_2010.csv')
p9 = pd.read_csv('iprecipitation_2009.csv')
p8 = pd.read_csv('iprecipitation_2008.csv')
p7 = pd.read_csv('iprecipitation_2007.csv')
p6 = pd.read_csv('iprecipitation_2006.csv')
p5 = pd.read_csv('iprecipitation_2005.csv')
p4 = pd.read_csv('iprecipitation_2004.csv')
p3 = pd.read_csv('iprecipitation_2003.csv')
In [ ]:
mon = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec','lat','lon']

p3.columns = mon
p4.columns = mon
p5.columns = mon
p6.columns = mon
p7.columns = mon
p8.columns = mon
p9.columns = mon
p10.columns = mon
p11.columns = mon
p12.columns = mon
p13.columns = mon
p14.columns = mon
p15.columns = mon
p16.columns = mon
p17.columns = mon
p18.columns = mon
p19.columns = mon
p20.columns = mon
p21.columns = mon
p22.columns = mon
p23.columns = mon
In [ ]:
import matplotlib.pyplot as plt
monthly_averages2023 = p23.mean()
monthly_averages2023 = monthly_averages2023.drop(['lat','lon'])
monthly_averages2022 = p22.mean()
monthly_averages2022 = monthly_averages2022.drop(['lat','lon'])
monthly_averages2021 = p21.mean()
monthly_averages2021 = monthly_averages2021.drop(['lat','lon'])
monthly_averages2020 = p20.mean()
monthly_averages2020 = monthly_averages2020.drop(['lat','lon'])
monthly_averages2019 = p19.mean()
monthly_averages2019 = monthly_averages2019.drop(['lat','lon'])
monthly_averages2018 = p18.mean()
monthly_averages2018 = monthly_averages2018.drop(['lat','lon'])
monthly_averages2017 = p17.mean()
monthly_averages2017 = monthly_averages2017.drop(['lat','lon'])
monthly_averages2016 = p16.mean()
monthly_averages2016 = monthly_averages2016.drop(['lat','lon'])
monthly_averages2015 = p15.mean()
monthly_averages2015 = monthly_averages2015.drop(['lat','lon'])
monthly_averages2014 = p14.mean()
monthly_averages2014 = monthly_averages2014.drop(['lat','lon'])
monthly_averages2013 = p13.mean()
monthly_averages2013 = monthly_averages2013.drop(['lat','lon'])
monthly_averages2012 = p12.mean()
monthly_averages2012 = monthly_averages2012.drop(['lat','lon'])
monthly_averages2011 = p11.mean()
monthly_averages2011 = monthly_averages2011.drop(['lat','lon'])
monthly_averages2010 = p10.mean()
monthly_averages2010 = monthly_averages2010.drop(['lat','lon'])
monthly_averages2009 = p9.mean()
monthly_averages2009 = monthly_averages2009.drop(['lat','lon'])
monthly_averages2008 = p8.mean()
monthly_averages2008 = monthly_averages2008.drop(['lat','lon'])
monthly_averages2007 = p7.mean()
monthly_averages2007 = monthly_averages2007.drop(['lat','lon'])
monthly_averages2006 = p6.mean()
monthly_averages2006 = monthly_averages2006.drop(['lat','lon'])
monthly_averages2005 = p5.mean()
monthly_averages2005 = monthly_averages2005.drop(['lat','lon'])
monthly_averages2004 = p4.mean()
monthly_averages2004 = monthly_averages2004.drop(['lat','lon'])
monthly_averages2003 = p3.mean()
monthly_averages2003 = monthly_averages2003.drop(['lat','lon'])

#plot the data of each month value in graph point 
monthly_averages2023.plot(kind='line')
plt.show()
monthly_averages2022.plot(kind='line')
plt.show()
monthly_averages2021.plot(kind='line')
plt.show()
In [ ]:
maindf = pd.concat([monthly_averages2003, monthly_averages2004, monthly_averages2005, monthly_averages2006, monthly_averages2007, monthly_averages2008, monthly_averages2009, monthly_averages2010, monthly_averages2011, monthly_averages2012, monthly_averages2013, monthly_averages2014, monthly_averages2015, monthly_averages2016, monthly_averages2017, monthly_averages2018, monthly_averages2019, monthly_averages2020, monthly_averages2021, monthly_averages2022, monthly_averages2023], axis=1)

maindf.columns = range(2003,2024)
In [ ]:
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
ls = [maindf.loc[month].mean() for month in months]
maindf['avg_of_all'] = ls
#plot the avg of all month in each year
In [ ]:
lsa= maindf['avg_of_all']
lsa.plot(kind='line')
plt.show()
In [ ]:
sam = maindf['avg_of_all']
amamoly = maindf.sub(sam, axis=0)
amamoly1 = maindf
In [ ]:
# i need the analmoly in time series
amamoly.plot()
plt.show()
In [ ]:
data ={
    'index':['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
    '2003':amamoly1[2003],
    '2004':amamoly1[2004],
    '2005':amamoly1[2005],
    '2006':amamoly1[2006],
    '2007':amamoly1[2007],
    '2008':amamoly1[2008],
    '2009':amamoly1[2009],
    '2010':amamoly1[2010],
    '2011':amamoly1[2011],
    '2012':amamoly1[2012],
    '2013':amamoly1[2013],
    '2014':amamoly1[2014],
    '2015':amamoly1[2015],
    '2016':amamoly1[2016],
    '2017':amamoly1[2017],
    '2018':amamoly1[2018],
    '2019':amamoly1[2019],
    '2020':amamoly1[2020],
    '2021':amamoly1[2021],
    '2022':amamoly1[2022],
    '2023':amamoly1[2023]
}
df = pd.DataFrame(data)

# Melt the DataFrame to long format
df_long = df.melt(id_vars=['index'], var_name='Year', value_name='Value')

# Convert 'index' to datetime
df_long['Date'] = pd.to_datetime(df_long['Year'] + '-' + df_long['index'], format='%Y-%b')

# Plotting the time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], df_long['Value'], marker='', linestyle='-', color='black')


# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Year')
plt.ylabel(' Precipitation mm/hr') # Rotate x-axis labels for better readability

# Show the plot
plt.grid(True)
plt.show()

#write the data to csv file
df_long.to_csv('precipitation_anomalyINdo.csv', index=False)
In [ ]:
data ={
    'index':['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
    '2003':amamoly[2003],
    '2004':amamoly[2004],
    '2005':amamoly[2005],
    '2006':amamoly[2006],
    '2007':amamoly[2007],
    '2008':amamoly[2008],
    '2009':amamoly[2009],
    '2010':amamoly[2010],
    '2011':amamoly[2011],
    '2012':amamoly[2012],
    '2013':amamoly[2013],
    '2014':amamoly[2014],
    '2015':amamoly[2015],
    '2016':amamoly[2016],
    '2017':amamoly[2017],
    '2018':amamoly[2018],
    '2019':amamoly[2019],
    '2020':amamoly[2020],
    '2021':amamoly[2021],
    '2022':amamoly[2022],
    '2023':amamoly[2023]
}
df = pd.DataFrame(data)

# Melt the DataFrame to long format
df_long = df.melt(id_vars=['index'], var_name='Year', value_name='Value')

# Convert 'index' to datetime
df_long['Date'] = pd.to_datetime(df_long['Year'] + '-' + df_long['index'], format='%Y-%b')

# Plotting the time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], df_long['Value'], marker='', linestyle='-', color='black')
plt.fill_between(df_long['Date'], df_long['Value'],where= df_long['Value']>0, color='blue', alpha=0.6)
plt.fill_between(df_long['Date'], df_long['Value'],where= df_long['Value']<0, color='red', alpha=0.6)


# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Year')
plt.ylabel(' Precipitation mm/hr') # Rotate x-axis labels for better readability

# Show the plot
plt.grid(True)
plt.show()
In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def moving_average(data, window_size):
    return data.rolling(window=window_size, center=True).mean()

def kz_filter(data, window_size, iterations):
    result = data.copy()
    for _ in range(iterations):
        result = moving_average(result, window_size)
    return result# Apply KZ filter
window_size = 5
iterations = 3
df_longss = df_long.copy()
df_longss['Filtered_Value5'] = kz_filter(df_long['Value'], window_size, iterations)

# Plotting the time series data
plt.figure(figsize=(18, 5))
plt.plot(df_longss['Date'], df_longss['Filtered_Value5'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value5'],where=df_longss['Filtered_Value5']>0, color='red', alpha=0.6)
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value5'],where=df_longss['Filtered_Value5']<0, color='blue', alpha=0.6)

# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with KZ Filter with window size 5 ')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr')
plt.legend()

# Show the plot
plt.grid(True)
plt.show()
In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def moving_average(data, window_size):
    return data.rolling(window=window_size, center=True).mean()

def kz_filter(data, window_size, iterations):
    result = data.copy()
    for _ in range(iterations):
        result = moving_average(result, window_size)
    return result# Apply KZ filter
window_size = 3
iterations = 3
df_longss['Filtered_Value3'] = kz_filter(df_long['Value'], window_size, iterations)

# Plotting the time series data
plt.figure(figsize=(18, 5))
plt.plot(df_longss['Date'], df_longss['Filtered_Value3'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value3'],where=df_longss['Filtered_Value3']>0, color='red', alpha=0.6)
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value3'],where=df_longss['Filtered_Value3']<0, color='blue', alpha=0.6)

# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with KZ Filter with window size 3')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr')
plt.legend()

# Show the plot
plt.grid(True)
plt.show()
In [ ]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def moving_average(data, window_size):
    return data.rolling(window=window_size, center=True).mean()

def kz_filter(data, window_size, iterations):
    result = data.copy()
    for _ in range(iterations):
        result = moving_average(result, window_size)
    return result# Apply KZ filter
window_size = 7
iterations = 3
df_longss['Filtered_Value7'] = kz_filter(df_long['Value'], window_size, iterations)

# Plotting the time series data
plt.figure(figsize=(18, 5))
plt.plot(df_longss['Date'], df_longss['Filtered_Value7'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value7'],where=df_longss['Filtered_Value7']>0, color='red', alpha=0.6)
plt.fill_between(df_longss['Date'], df_longss['Filtered_Value7'],where=df_longss['Filtered_Value7']<0, color='blue', alpha=0.6)

# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with KZ Filter with window size 7')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr')
plt.legend()

# Show the plot
plt.grid(True)
plt.show()
In [ ]:
from scipy.ndimage import gaussian_filter1d
df_long = df_long.sort_values('Date')

# Apply Gaussian smoothing
sigma = 3  # Standard deviation for Gaussian kernel
smoothed_values1 = gaussian_filter1d(df_long['Value'], sigma=sigma)

# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], smoothed_values1, color='black', label='Smoothed Value')
plt.fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1<0, color='b', alpha=0.6)


# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing  with sigma 3')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr') # Rotate x-axis labels for better readability
plt.legend()

# Show the plot
plt.grid(True)
plt.show()
In [ ]:
from scipy.ndimage import gaussian_filter1d
df_long = df_long.sort_values('Date')

# Apply Gaussian smoothing
sigma = 5  # Standard deviation for Gaussian kernel
smoothed_values2 = gaussian_filter1d(df_long['Value'], sigma=sigma)

# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], smoothed_values2, color='black', label='Smoothed Value')
plt.fill_between(df_long['Date'], smoothed_values2,where=smoothed_values2>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], smoothed_values2,where=smoothed_values2<0, color='b', alpha=0.6)


# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing with sigma 5')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr')
 # Rotate x-axis labels for better readability
plt.legend()

# Show the plot
plt.grid(True)
plt.show()
In [ ]:
from scipy.ndimage import gaussian_filter1d
df_long = df_long.sort_values('Date')

# Apply Gaussian smoothing
sigma = 7  # Standard deviation for Gaussian kernel
smoothed_values3 = gaussian_filter1d(df_long['Value'], sigma=sigma)

# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], smoothed_values3, color='black', label='Smoothed Value')
plt.fill_between(df_long['Date'], smoothed_values3,where=smoothed_values3>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], smoothed_values3,where=smoothed_values3<0, color='b', alpha=0.6)


# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing with sigma 7')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr') # Rotate x-axis labels for better readability
plt.legend()

# Show the plot
plt.grid(True)
plt.show()
In [ ]:
from scipy.signal import convolve
def triangle_kernel(size):
    kernel = np.zeros(size)
    half_size = size // 2
    for i in range(half_size + 1):
        kernel[i] = i + 1
        kernel[-(i + 1)] = i + 1
    return kernel / kernel.sum()

kernel_size = 3  # Adjust size as needed
kernel = triangle_kernel(kernel_size)

# Apply triangle smoothing
Tsmoothed_values = convolve(df_long['Value'], kernel, mode='same')

# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], Tsmoothed_values, color='black', label='Smoothed Value')
plt.fill_between(df_long['Date'], Tsmoothed_values, where=Tsmoothed_values>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], Tsmoothed_values, where=Tsmoothed_values<0, color='b', alpha=0.6)

# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing with kernel size 3')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr')  # Rotate x-axis labels for better readability
plt.legend()

# Show the plot
plt.grid(True)
plt.show()
In [ ]:
from scipy.signal import convolve
def triangle_kernel(size):
    kernel = np.zeros(size)
    half_size = size // 2
    for i in range(half_size + 1):
        kernel[i] = i + 1
        kernel[-(i + 1)] = i + 1
    return kernel / kernel.sum()

kernel_size = 5  # Adjust size as needed
kernel = triangle_kernel(kernel_size)

# Apply triangle smoothing
Tsmoothed_values5 = convolve(df_long['Value'], kernel, mode='same')

# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], Tsmoothed_values5, color='black', label='Smoothed Value')
plt.fill_between(df_long['Date'], Tsmoothed_values5, where=Tsmoothed_values5>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], Tsmoothed_values5, where=Tsmoothed_values5<0, color='b', alpha=0.6)

# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing with kernel size 5')
plt.xlabel('Year')
plt.ylabel('Presispitation in mm/hr')  # Rotate x-axis labels for better readability
plt.legend()

# Show the plot
plt.grid(True)
plt.show()
In [ ]:
from scipy.signal import convolve
def triangle_kernel(size):
    kernel = np.zeros(size)
    half_size = size // 2
    for i in range(half_size + 1):
        kernel[i] = i + 1
        kernel[-(i + 1)] = i + 1
    return kernel / kernel.sum()

kernel_size = 7  # Adjust size as needed
kernel = triangle_kernel(kernel_size)

# Apply triangle smoothing
Tsmoothed_values7 = convolve(df_long['Value'], kernel, mode='same')

# Plotting the original and smoothed time series data
plt.figure(figsize=(18, 5))
plt.plot(df_long['Date'], Tsmoothed_values7, color='black')
plt.fill_between(df_long['Date'], Tsmoothed_values7, where=Tsmoothed_values7>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], Tsmoothed_values7, where=Tsmoothed_values7<0, color='b', alpha=0.6)

# Adding title and labels
plt.title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing with kernel size 7')
plt.xlabel('Year')
plt.ylabel('precepitation mm/hr') 

# Show the plot
plt.grid(True)
plt.show()
In [ ]:
import pandas as pd

# Assuming amamoly is already defined
pf = amamoly.drop('avg_of_all', axis=1)

# Calculate 3-month moving averages
m1 = (pf.loc['Jan'] + pf.loc['Feb'] + pf.loc['Mar']) / 3
m2 = (pf.loc['Feb'] + pf.loc['Mar'] + pf.loc['Apr']) / 3
m3 = (pf.loc['Mar'] + pf.loc['Apr'] + pf.loc['May']) / 3
m4 = (pf.loc['Apr'] + pf.loc['May'] + pf.loc['Jun']) / 3
m5 = (pf.loc['May'] + pf.loc['Jun'] + pf.loc['Jul']) / 3
m6 = (pf.loc['Jun'] + pf.loc['Jul'] + pf.loc['Aug']) / 3
m7 = (pf.loc['Jul'] + pf.loc['Aug'] + pf.loc['Sep']) / 3
m8 = (pf.loc['Aug'] + pf.loc['Sep'] + pf.loc['Oct']) / 3
m9 = (pf.loc['Sep'] + pf.loc['Oct'] + pf.loc['Nov']) / 3
m10 = (pf.loc['Oct'] + pf.loc['Nov'] + pf.loc['Dec']) / 3

# Concatenate the results into a single DataFrame
df = pd.concat([m1, m2, m3, m4, m5, m6, m7, m8, m9, m10], axis=1)

# Set appropriate column names
df.columns = range(1, 11)

# Add a 'Year' column and set it as the index
df['Year'] = range(2003, 2024)

# Transform the DataFrame from wide to long format
df_melted = df.melt(id_vars=["Year"], var_name="Month", value_name="Value")

# Print the first few rows of the melted DataFrame for verification


# store the data in a variable by taking the values of df_melted by sorting the year and month
a = df_melted.sort_values(by=['Year', 'Month'])
a["Date"] = pd.to_datetime(a['Year'].astype(str) + '-' + a['Month'].astype(str), format='%Y-%m')
In [ ]:
plt.figure(figsize=(18, 5))

# Plot the continuous line
plt.plot(a['Date'], a['Value'], linestyle='-', color='black')
plt.fill_between(a['Date'], a['Value'],where=a['Value']>0, color='r', alpha=0.6)
plt.fill_between(a['Date'], a['Value'],where=a['Value']<0, color='b', alpha=0.6)

# Adding title and labels
plt.title('3-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Year')
plt.ylabel('precepitation mm/hr')
# Rotate x-axis labels for better readability

# Show the plot
plt.grid(True)
plt.show()
In [ ]:
import pandas as pd

# Assuming amamoly is a DataFrame with appropriate data
pf = amamoly.drop('avg_of_all', axis=1)

# Calculate 5-month moving averages
m1 = (pf.loc['Jan'] + pf.loc['Feb'] + pf.loc['Mar'] + pf.loc['Apr'] + pf.loc['May']) / 5
m2 = (pf.loc['Feb'] + pf.loc['Mar'] + pf.loc['Apr'] + pf.loc['May'] + pf.loc['Jun']) / 5
m3 = (pf.loc['Mar'] + pf.loc['Apr'] + pf.loc['May'] + pf.loc['Jun'] + pf.loc['Jul']) / 5
m4 = (pf.loc['Apr'] + pf.loc['May'] + pf.loc['Jun'] + pf.loc['Jul'] + pf.loc['Aug']) / 5
m5 = (pf.loc['May'] + pf.loc['Jun'] + pf.loc['Jul'] + pf.loc['Aug'] + pf.loc['Sep']) / 5
m6 = (pf.loc['Jun'] + pf.loc['Jul'] + pf.loc['Aug'] + pf.loc['Sep'] + pf.loc['Oct']) / 5
m7 = (pf.loc['Jul'] + pf.loc['Aug'] + pf.loc['Sep'] + pf.loc['Oct'] + pf.loc['Nov']) / 5
m8 = (pf.loc['Aug'] + pf.loc['Sep'] + pf.loc['Oct'] + pf.loc['Nov'] + pf.loc['Dec']) / 5

# Concatenate the results into a single DataFrame
df = pd.concat([m1, m2, m3, m4, m5, m6, m7, m8], axis=1)

# Set appropriate column names
df.columns = range(1, 9)

# Add a 'Year' column and set it as the index
df['Year'] = range(2003, 2024)

# Transform the DataFrame from wide to long format
df_melted = df.melt(id_vars=["Year"], var_name="Month", value_name="Value")

# Sort the DataFrame by 'Year' and 'Month'
b = df_melted.sort_values(by=['Year', 'Month'])

# Convert 'Year' and 'Month' to datetime format
b["Date"] = pd.to_datetime(b['Year'].astype(str) + '-' + b['Month'].astype(str), format='%Y-%m')

# Print the first few rows of the melted DataFrame for verification
print(b.head())
    Year Month     Value       Date
0   2003     1 -0.019892 2003-01-01
21  2003     2 -0.037762 2003-02-01
42  2003     3 -0.060655 2003-03-01
63  2003     4 -0.050009 2003-04-01
84  2003     5 -0.052912 2003-05-01
In [ ]:
plt.figure(figsize=(18, 5))

# Plot the continuous line
plt.plot(b['Date'], b['Value'], linestyle='-', color='black')
plt.fill_between(b['Date'], b['Value'],where=b['Value']>0, color='r', alpha=0.6)
plt.fill_between(b['Date'], b['Value'],where=b['Value']<0, color='b', alpha=0.6)
# Adding title and labels
plt.title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Date')
plt.ylabel('precepitation mm/hr')
 # Rotate x-axis labels for better readability

# Show the plot
plt.grid(True)
plt.show()
In [ ]:
#create a data frame and read the DMI data 

dmi = pd.read_csv('DMI.csv')
dmi['Date'] = pd.to_datetime(dmi['Date'], format='%Y-%m-%d')
plt.figure(figsize=(18, 5))
plt.plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')

# Fill the positive and negative areas with different colors
plt.fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
plt.fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
plt.title('Dipole Mode Index ')
plt.xlabel('Year')
plt.ylabel('DMI Value')

# Add gridlines
plt.grid(True)
In [ ]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the data
onia = pd.read_csv('oni_data.csv')

onia = onia[onia['Year'] > 2002]
# Create the plot
onia["Month"] = [i%12+1 for i in range(len(onia))]
onia.info()
plt.figure(figsize=(18, 5))

# Convert 'Year' and 'Month' to datetime format
onia["Date"] = pd.to_datetime(onia['Year'].astype(str) + '-' + onia['Month'].astype(str), format='%Y-%m')

# Plot the ONI data
plt.plot(onia['Date'], onia["ANOM"], color='black', marker='', label='ONI')

# Fill the positive and negative areas with different colors
plt.fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
plt.fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
plt.title('Oceanic Niño Index')
plt.xlabel('Year')
plt.ylabel('ONI Value')

# Add gridlines
plt.grid(True)

# Show the plot
plt.legend()
plt.show()

print(len(onia))
<class 'pandas.core.frame.DataFrame'>
Index: 257 entries, 636 to 892
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   SEAS    257 non-null    object 
 1   Year    257 non-null    int64  
 2   Total   257 non-null    float64
 3   ANOM    257 non-null    float64
 4   Month   257 non-null    int64  
dtypes: float64(2), int64(2), object(1)
memory usage: 12.0+ KB
257
In [ ]:
# Read the CSV file
PDO = pd.read_csv('PDO.csv')
print(PDO.head())

# Convert 'Date' to datetime format
PDO['Date'] = pd.to_datetime(PDO['Date'], format='%Y-%m-%d')

# Filter the DataFrame for the years 2002 to 2023
PDO = PDO[(PDO['Year'] > 2002) & (PDO['Year'] < 2024)]

# Plot the data of PDO
plt.figure(figsize=(18, 5))
plt.plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')

# Fill the positive and negative areas with different colors
plt.fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
plt.fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
plt.title('Pacific Decadal Oscillation')
plt.xlabel('Year')
plt.ylabel('PDO Value')

# Add gridlines
plt.grid(True)

# Show the plot
plt.legend()
plt.show()
   Year Month   PDO        Date
0  1854   Jan  0.11  1854-01-01
1  1854   Feb -0.24  1854-02-01
2  1854   Mar -0.40  1854-03-01
3  1854   Apr -0.44  1854-04-01
4  1854   May -0.54  1854-05-01
In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm

# Assuming amamoly is already defined
data = {
    'index': ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'],
    '2003': amamoly[2003],
    '2004': amamoly[2004],
    '2005': amamoly[2005],
    '2006': amamoly[2006],
    '2007': amamoly[2007],
    '2008': amamoly[2008],
    '2009': amamoly[2009],
    '2010': amamoly[2010],
    '2011': amamoly[2011],
    '2012': amamoly[2012],
    '2013': amamoly[2013],
    '2014': amamoly[2014],
    '2015': amamoly[2015],
    '2016': amamoly[2016],
    '2017': amamoly[2017],
    '2018': amamoly[2018],
    '2019': amamoly[2019],
    '2020': amamoly[2020],
    '2021': amamoly[2021],
    '2022': amamoly[2022],
    '2023': amamoly[2023]
}

# Create DataFrame
df = pd.DataFrame(data)

# Melt the DataFrame to long format
df_longs = df.melt(id_vars=['index'], var_name='Year', value_name='Value')

# Convert 'index' to datetime
df_longs['Date'] = pd.to_datetime(df_longs['Year'] + '-' + df_longs['index'], format='%Y-%b')

# Sort by date to ensure continuous line plot
df_longs = df_longs.sort_values(by='Date')

# Calculate 3-month moving average
df_longs['3_month_MA'] = df_longs['Value'].rolling(window=3, min_periods=1).mean()


# Initialize the matplotlib figure
plt.figure(figsize=(18, 5))

# Plot the continuous line
plt.plot(df_longs['Date'], df_longs['3_month_MA'], marker='', linestyle='-', color='black')
plt.fill_between(df_longs['Date'], df_longs['3_month_MA'],where=df_longs['3_month_MA']>0, color='red', alpha=0.6,interpolate=True)
plt.fill_between(df_longs['Date'], df_longs['3_month_MA'],where=df_longs['3_month_MA']<0, color='blue', alpha=0.6,interpolate=True)


# Adding title and labels
plt.title('3-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Year')
plt.ylabel('precepitation mm/hr')
  # Rotate x-axis labels for better readability

# Show the plot
plt.grid(True)
plt.show()
In [ ]:
df = pd.DataFrame(data)

# Melt the DataFrame to long format
df_long = df.melt(id_vars=['index'], var_name='Year', value_name='Value')

# Convert 'index' to datetime
df_long['Date'] = pd.to_datetime(df_long['Year'] + '-' + df_long['index'], format='%Y-%b')

# Sort by date to ensure continuous line plot
df_long = df_long.sort_values(by='Date')

# Calculate 3-month moving average
df_long['5_month_MA'] = df_long['Value'].rolling(window=5, min_periods=1).mean()


# Initialize the matplotlib figure
plt.figure(figsize=(18, 5))

# Plot the continuous line
plt.plot(df_long['Date'], df_long['5_month_MA'], marker='', linestyle='-', color='black')
plt.fill_between(df_long['Date'], df_long['5_month_MA'],where=df_long['5_month_MA']>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], df_long['5_month_MA'],where=df_long['5_month_MA']<0, color='b', alpha=0.6)

# Adding title and labels
plt.title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Year')
plt.ylabel('precepitation mm/hr')

# Show the plot
plt.grid(True)
plt.show()
In [ ]:
# Calculate 3-month moving average
df_long['7_month_MA'] = df_long['Value'].rolling(window=7, min_periods=1).mean()


# Initialize the matplotlib figure
plt.figure(figsize=(18, 5))

# Plot the continuous line
plt.plot(df_long['Date'], df_long['7_month_MA'], marker='', linestyle='-', color='black')
plt.fill_between(df_long['Date'], df_long['7_month_MA'],where=df_long['7_month_MA']>0, color='r', alpha=0.6)
plt.fill_between(df_long['Date'], df_long['7_month_MA'],where=df_long['7_month_MA']<0, color='b', alpha=0.6)

# Adding title and labels
plt.title('7-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
plt.xlabel('Year')
plt.ylabel('precepitation mm/hr')

# Show the plot
plt.grid(True)
plt.show()
In [ ]:
fig, axs = plt.subplots(4, 1, figsize=(18, 10))

# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()

# Plot the second graph in the second subplot
axs[1].plot(df_longss['Date'], df_longss['Filtered_Value3'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
axs[1].fill_between(df_longss['Date'], df_longss['Filtered_Value3'], where=df_longss['Filtered_Value3'] > 0, color='red', alpha=0.6)
axs[1].fill_between(df_longss['Date'],df_longss['Filtered_Value3'], where=df_longss['Filtered_Value3'] < 0, color='blue', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2021 with KZ Filter 3')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[1].legend()
 # Rotate x-axis labels for better readability

axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')

# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')


# Add gridlines
axs[2].grid(True)

# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')

# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')

# Add gridlines
axs[3].grid(True)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
fig, axs = plt.subplots(4, 1, figsize=(18, 10))

# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()

# Plot the second graph in the second subplot
axs[1].plot(df_longss['Date'], df_longss['Filtered_Value5'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
axs[1].fill_between(df_longss['Date'], df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] > 0, color='red', alpha=0.6)
axs[1].fill_between(df_longss['Date'],df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] < 0, color='blue', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2021 with KZ Filter 5')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[1].legend()
 # Rotate x-axis labels for better readability

axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')

# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')


# Add gridlines
axs[2].grid(True)

# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')

# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')

# Add gridlines
axs[3].grid(True)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
fig, axs = plt.subplots(4, 1, figsize=(18, 10))

# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()

# Plot the second graph in the second subplot
axs[1].plot(df_longss['Date'], df_longss['Filtered_Value7'], marker='', linestyle='-', color='black', linewidth=2, label='KZ Filtered Value')
axs[1].fill_between(df_longss['Date'], df_longss['Filtered_Value7'], where=df_longss['Filtered_Value7'] > 0, color='red', alpha=0.6)
axs[1].fill_between(df_longss['Date'],df_longss['Filtered_Value7'], where=df_longss['Filtered_Value7'] < 0, color='blue', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2021 with KZ Filter 7')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[1].legend()
 # Rotate x-axis labels for better readability

axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')

# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')


# Add gridlines
axs[2].grid(True)

# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')

# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')

# Add gridlines
axs[3].grid(True)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
fig, axs = plt.subplots(4, 1, figsize=(18, 10))

# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()

# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], smoothed_values1, color='black', label='Smoothed Value')
axs[1].fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing 3')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)

axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')

# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')

# Add gridlines
axs[2].grid(True)

# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')

# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')

# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
fig, axs = plt.subplots(4, 1, figsize=(18, 10))

# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()

# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], smoothed_values2, color='black', label='Smoothed Value')
axs[1].fill_between(df_long['Date'], smoothed_values2,where=smoothed_values2>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], smoothed_values2,where=smoothed_values2<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing 5')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)

axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')

# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')

# Add gridlines
axs[2].grid(True)

# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')

# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')

# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
fig, axs = plt.subplots(4, 1, figsize=(18, 10))

# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()

# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], smoothed_values3, color='black', label='Smoothed Value')
axs[1].fill_between(df_long['Date'], smoothed_values3,where=smoothed_values3>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], smoothed_values3,where=smoothed_values3<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing 7')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)

axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')

# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')

# Add gridlines
axs[2].grid(True)

# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')

# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')

# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
fig, axs = plt.subplots(4, 1, figsize=(18, 10))

# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()

# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], Tsmoothed_values, color='black')
axs[1].fill_between(df_long['Date'], Tsmoothed_values,where=Tsmoothed_values>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], Tsmoothed_values,where=Tsmoothed_values<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing 3')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')

# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')

# Add gridlines
axs[2].grid(True)

# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')

# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')

# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
fig, axs = plt.subplots(4, 1, figsize=(18, 10))

# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()

# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], Tsmoothed_values5, color='black')
axs[1].fill_between(df_long['Date'], Tsmoothed_values5,where=Tsmoothed_values5>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], Tsmoothed_values5,where=Tsmoothed_values5<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing 5')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')

# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')

# Add gridlines
axs[2].grid(True)

# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')

# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')

# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
fig, axs = plt.subplots(4, 1, figsize=(18, 10))

# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()

# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'], Tsmoothed_values7, color='black')
axs[1].fill_between(df_long['Date'], Tsmoothed_values7,where=Tsmoothed_values7>0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], Tsmoothed_values7,where=Tsmoothed_values7<0, color='b', alpha=0.6)
axs[1].set_title('Monthly Time Series Data from 2003 to 2023 with Triangle Smoothing 7')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')

# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')

# Add gridlines
axs[2].grid(True)

# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')

# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')

# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
fig, axs = plt.subplots(4, 1, figsize=(18, 10))

# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()

# Plot the second graph in the second subplot
axs[1].plot(b['Date'], b['Value'], linestyle='-', color='black')
axs[1].fill_between(b['Date'], b['Value'], where=b['Value'] > 0, color='r', alpha=0.6)
axs[1].fill_between(b['Date'], b['Value'], where=b['Value'] < 0, color='b', alpha=0.6)
axs[1].set_title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)


# Plot the third graph in the third subplot
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')

# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')

# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')

# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')

# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
fig, axs = plt.subplots(4, 1, figsize=(18, 10))

# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()

# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'],df_long['5_month_MA'], linestyle='-', color='black')
axs[1].fill_between(df_long['Date'], df_long['5_month_MA'], where=df_long['5_month_MA'] > 0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], df_long['5_month_MA'], where=df_long['5_month_MA'] < 0, color='b', alpha=0.6)
axs[1].set_title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('Value')
axs[1].grid(True)


# Plot the third graph in the third subplot
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')

# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')

# Add gridlines
axs[2].grid(True)

# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')

# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')

# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
fig, axs = plt.subplots(4, 1, figsize=(18, 10))

# Plot the first graph in the first subplot
axs[0].plot(dmi['Date'], dmi['PDO'], color='blue', alpha=0.7, label='PDO')
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(dmi['Date'], dmi['PDO'], 0, where=(dmi['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)
axs[0].set_title('Dipole Mode Index')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('DMI Value')
axs[0].grid(True)
axs[0].legend()

# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'],df_long['7_month_MA'], linestyle='-', color='black')
axs[1].fill_between(df_long['Date'], df_long['7_month_MA'], where=df_long['7_month_MA'] > 0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], df_long['7_month_MA'], where=df_long['7_month_MA'] < 0, color='b', alpha=0.6)
axs[1].set_title('7-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('Value')
axs[1].grid(True)


# Plot the third graph in the third subplot
axs[2].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')

# Fill the positive and negative areas with different colors
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[2].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[2].set_title('Oceanic Niño Index')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('ONI Value')

# Add gridlines
axs[2].grid(True)
# Plot the data of PDO
axs[3].plot(PDO['Date'], PDO['PDO'], color='blue', alpha=0.7, label='PDO')

# Fill the positive and negative areas with different colors
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[3].fill_between(PDO['Date'], PDO['PDO'], 0, where=(PDO['PDO'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[3].set_title('Pacific Decadal Oscillation')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('PDO Value')

# Add gridlines
axs[3].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
fig, axs = plt.subplots(5, 1, figsize=(18, 13))

# Plot the third graph in the third subplot
axs[0].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')

# Fill the positive and negative areas with different colors
axs[0].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[0].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[0].set_title('Oceanic Niño Index')
axs[0].set_xlabel('Year')
axs[0].set_ylabel('ONI Value')

# Add gridlines
axs[0].grid(True)


# Plot the second graph in the second subplot
axs[1].plot(df_long['Date'],df_long['7_month_MA'], linestyle='-', color='black')
axs[1].fill_between(df_long['Date'], df_long['7_month_MA'], where=df_long['7_month_MA'] > 0, color='r', alpha=0.6)
axs[1].fill_between(df_long['Date'], df_long['7_month_MA'], where=df_long['7_month_MA'] < 0, color='b', alpha=0.6)
axs[1].set_title('7-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('precepitation mm/hr')
axs[1].grid(True)

axs[2].plot(df_long['Date'],df_long['5_month_MA'], linestyle='-', color='black')
axs[2].fill_between(df_long['Date'], df_long['5_month_MA'], where=df_long['5_month_MA'] > 0, color='r', alpha=0.6)
axs[2].fill_between(df_long['Date'], df_long['5_month_MA'], where=df_long['5_month_MA'] < 0, color='b', alpha=0.6)
axs[2].set_title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[2].set_xlabel('Year')
axs[2].set_ylabel('precepitation mm/hr')
axs[2].grid(True)

# Plot the second graph in the second subplot
axs[3].plot(df_long['Date'], smoothed_values1, color='black', label='Smoothed Value')
axs[3].fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1>0, color='r', alpha=0.6)
axs[3].fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1<0, color='b', alpha=0.6)
axs[3].set_title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing 3')
axs[3].set_xlabel('Year')
axs[3].set_ylabel('precepitation mm/hr')
axs[3].grid(True)

axs[4].plot(df_longss['Date'],df_longss['Filtered_Value5'], linestyle='-', color='black')
axs[4].fill_between(df_longss['Date'], df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] > 0, color='r', alpha=0.6)
axs[4].fill_between(df_longss['Date'], df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] < 0, color='b', alpha=0.6)
axs[4].set_title('Monthly Time Series Data from 2003 to 2021 with KZ Filter 5')
axs[4].set_xlabel('Year')
axs[4].set_ylabel('precepitation mm/hr')
axs[4].grid(True)
# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
fig, axs = plt.subplots(2, 1, figsize=(18, 5))



# Plot the second graph in the second subplot
axs[0].plot(df_long['Date'],df_long['5_month_MA'], linestyle='-', color='black')
axs[0].fill_between(df_long['Date'], df_long['5_month_MA'], where=df_long['5_month_MA'] > 0, color='r', alpha=0.6)
axs[0].fill_between(df_long['Date'], df_long['5_month_MA'], where=df_long['5_month_MA'] < 0, color='b', alpha=0.6)
axs[0].set_title('5-Month Moving Average of Monthly Time Series Data from 2003 to 2023')
axs[0].set_xlabel('Year')
axs[0].set_ylabel('Value')
axs[0].grid(True)


# Plot the third graph in the third subplot
axs[1].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')

# Fill the positive and negative areas with different colors
axs[1].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[1].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[1].set_title('Oceanic Niño Index')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('ONI Value')

# Add gridlines
axs[1].grid(True)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
fig, axs = plt.subplots(2, 1, figsize=(18, 5))



# Plot the second graph in the second subplot
axs[0].plot(df_longss['Date'],df_longss['Filtered_Value5'], linestyle='-', color='black')
axs[0].fill_between(df_longss['Date'], df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] > 0, color='r', alpha=0.6)
axs[0].fill_between(df_longss['Date'], df_longss['Filtered_Value5'], where=df_longss['Filtered_Value5'] < 0, color='b', alpha=0.6)
axs[0].set_title('Monthly Time Series Data from 2003 to 2021 with KZ Filter 5')
axs[0].set_xlabel('Year')
axs[0].set_ylabel('Value')
axs[0].grid(True)


# Plot the third graph in the third subplot
axs[1].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')

# Fill the positive and negative areas with different colors
axs[1].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[1].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[1].set_title('Oceanic Niño Index')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('ONI Value')

# Add gridlines
axs[1].grid(True)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
fig, axs = plt.subplots(2, 1, figsize=(18, 5))



# Plot the second graph in the second subplot
axs[0].plot(df_long['Date'], smoothed_values1, color='black', label='Smoothed Value')
axs[0].fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1>0, color='r', alpha=0.6)
axs[0].fill_between(df_long['Date'], smoothed_values1,where=smoothed_values1<0, color='b', alpha=0.6)
axs[0].set_title('Monthly Time Series Data from 2003 to 2023 with Gaussian Smoothing 3')
axs[0].set_xlabel('Year')
axs[0].set_ylabel('precepitation mm/hr')
axs[0].grid(True)


# Plot the third graph in the third subplot
axs[1].plot(onia['Date'], onia["ANOM"], color='blue', marker='', label='ONI')

# Fill the positive and negative areas with different colors
axs[1].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] >= 0), interpolate=True, color='red', alpha=0.6)
axs[1].fill_between(onia['Date'], onia['ANOM'], 0, where=(onia['ANOM'] < 0), interpolate=True, color='blue', alpha=0.6)

# Adding labels and title
axs[1].set_title('Oceanic Niño Index')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('ONI Value')

# Add gridlines
axs[1].grid(True)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
p = PDO['PDO'].values
dfss = df_long
dfss['PDO'] = p
dfss.head()
onia = onia[onia['Year'] < 2024]
k = onia['ANOM'].values
dfss['ONI'] = k

r = dmi['PDO'].values
dfss['DMI'] = r

s= df_longs['3_month_MA'].values
dfss['3_month_MA'] = s

dfss["triangular_smoothing3"] = Tsmoothed_values
dfss["triangular_smoothing5"] = Tsmoothed_values5
dfss["triangular_smoothing7"] = Tsmoothed_values7
dfss["gaussian_smoothing3"] = smoothed_values1
dfss["gaussian_smoothing5"] = smoothed_values2
dfss["gaussian_smoothing7"] = smoothed_values3
q = df_longss['Filtered_Value3'].values
dfss['KZ_Filter3'] = q
z =df_longss['Filtered_Value5'].values
dfss['KZ_Filter5'] = z
x = df_longss['Filtered_Value7'].values
dfss['KZ_Filter7'] = x
sz = df_long['7_month_MA'].values
dfss['7_month_MA'] = sz
In [ ]:
l= df_long['5_month_MA'].values
dfss['5_month_MA'] = l
In [ ]:
#corelation between PDO and 5 month moving average
dfss['5_month_MA'].corr(dfss['PDO'])
Out[ ]:
-0.397567276986067
In [ ]:
dfss['5_month_MA'].corr(dfss['ONI'])
Out[ ]:
-0.7714410169199405
In [ ]:
dfss['5_month_MA'].corr(dfss['DMI'])
Out[ ]:
-0.36837887683100584
In [ ]:
dfss['5_month_MA'].corr(dfss['PDO'])
Out[ ]:
-0.397567276986067
In [ ]:
dfss['triangular_smoothing3'].corr(dfss['PDO'])
Out[ ]:
-0.3233588011759541
In [ ]:
dfss['triangular_smoothing3'].corr(dfss['ONI'])
Out[ ]:
-0.6702292059163077
In [ ]:
dfss['triangular_smoothing3'].corr(dfss['DMI'])
Out[ ]:
-0.4306284113519841
In [ ]:
print(dfss['triangular_smoothing5'].corr(dfss['PDO']))
print(dfss['triangular_smoothing5'].corr(dfss['ONI']))
print(dfss['triangular_smoothing5'].corr(dfss['DMI']))
-0.3559615975993508
-0.7048183842190328
-0.434688287468563
In [ ]:
print(dfss['triangular_smoothing7'].corr(dfss['PDO']))
print(dfss['triangular_smoothing7'].corr(dfss['ONI']))
print(dfss['triangular_smoothing7'].corr(dfss['DMI']))
-0.37753744506375353
-0.7241869895220205
-0.43177332944740954
In [ ]:
print(dfss['gaussian_smoothing3'].corr(dfss['PDO']))
print(dfss['gaussian_smoothing3'].corr(dfss['ONI']))
print(dfss['gaussian_smoothing3'].corr(dfss['DMI']))
-0.42156596008633823
-0.7565829698989535
-0.39914548185486776
In [ ]:
print(dfss['gaussian_smoothing5'].corr(dfss['PDO']))
print(dfss['gaussian_smoothing5'].corr(dfss['ONI']))
print(dfss['gaussian_smoothing5'].corr(dfss['DMI']))
-0.48106799962223296
-0.7343367545256421
-0.3069307687397145
In [ ]:
print(dfss['gaussian_smoothing7'].corr(dfss['PDO']))
print(dfss['gaussian_smoothing7'].corr(dfss['ONI']))
print(dfss['gaussian_smoothing7'].corr(dfss['DMI']))
-0.5264503493640501
-0.6789521697552502
-0.21449888160440037
In [ ]:
print(dfss['KZ_Filter3'].corr(dfss['PDO']))
print(dfss['KZ_Filter3'].corr(dfss['ONI']))
print(dfss['KZ_Filter3'].corr(dfss['DMI']))
-0.3992976253520283
-0.7104839786261486
-0.4093174457233314
In [ ]:
print(dfss['KZ_Filter5'].corr(dfss['PDO']))
print(dfss['KZ_Filter5'].corr(dfss['ONI']))
print(dfss['KZ_Filter5'].corr(dfss['DMI']))
-0.47341211867694794
-0.7407472307036023
-0.37484337107430393
In [ ]:
print(dfss['KZ_Filter7'].corr(dfss['PDO']))
print(dfss['KZ_Filter7'].corr(dfss['ONI']))
print(dfss['KZ_Filter7'].corr(dfss['DMI']))
-0.5253820424791045
-0.7445675829844174
-0.3244072664842779
In [ ]:
print(dfss['7_month_MA'].corr(dfss['PDO']))
print(dfss['7_month_MA'].corr(dfss['ONI']))
print(dfss['7_month_MA'].corr(dfss['DMI']))
-0.44005512412229053
-0.7602157168323691
-0.2780914608881651
In [ ]:
print(dfss['3_month_MA'].corr(dfss['PDO']))
print(dfss['3_month_MA'].corr(dfss['ONI']))
print(dfss['3_month_MA'].corr(dfss['DMI']))
-0.35261859735597595
-0.7303401686202977
-0.4172781119915262
In [ ]:
import scipy.stats as stats
correlation, p_value = stats.pearsonr(dfss['5_month_MA'], dfss['ONI'])
print(f'Correlation between 5-month moving average and ONI: {correlation:.2f}', f'P-value: {p_value}')
Correlation between 5-month moving average and ONI: -0.77 P-value: 5.366513559816488e-51
In [ ]:
import scipy.stats as stats
correlation, p_value = stats.pearsonr(dfss['7_month_MA'], dfss['ONI'])
print(f'Correlation between 5-month moving average and ONI: {correlation:.2f}', f'P-value: {p_value}')
Correlation between 5-month moving average and ONI: -0.76 P-value: 9.855421507174027e-49
In [ ]:
#give corelation heat map
import seaborn as sns
dfsss = dfss.drop(['Year', 'index', 'Date'], axis=1)
plt.figure(figsize=(18, 10))
sns.heatmap(dfsss.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Heatmap')

plt.show()
In [ ]:
seasonal = pd.DataFrame()
seasonal["7_month_MA"] = dfss["7_month_MA"].values
print(onia.head())
    SEAS  Year  Total  ANOM  Month       Date
636  DJF  2003  27.51  0.92      1 2003-01-01
637  JFM  2003  27.41  0.63      2 2003-02-01
638  FMA  2003  27.58  0.38      3 2003-03-01
639  MAM  2003  27.56 -0.04      4 2003-04-01
640  AMJ  2003  27.48 -0.26      5 2003-05-01
In [ ]:
import pandas as pd
import numpy as np

dates = pd.date_range(start='2003-01-01', end='2023-12-31', freq='M')

precip_data = dfss["Value"].values
# Create DataFrame
df_precip = pd.DataFrame({
    'Date': dates,
    'Precipitation': precip_data
})
In [ ]:
df_precip['7_month_MA'] = df_precip['Precipitation'].rolling(window=7, min_periods=1).mean()
In [ ]:
# Extract month from the date
df_precip['Month'] = df_precip['Date'].dt.month

# Calculate the long-term monthly averages
monthly_averages = df_precip.groupby('Month')['7_month_MA'].mean()

# Map the long-term averages back onto the DataFrame using the month
df_precip['Monthly_Average'] = df_precip['Month'].apply(lambda x: monthly_averages[x])

# Calculate anomalies
df_precip['Anomaly'] = df_precip['7_month_MA'] - df_precip['Monthly_Average']
In [ ]:
plt.figure(figsize=(12, 6))
plt.plot(df_precip['Date'], df_precip['7_month_MA'], label='Anomaly', color='blue')
plt.fill_between(df_precip['Date'], df_precip['7_month_MA'], 0, where=df_precip['7_month_MA'] >= 0, color='red', alpha=0.6)
plt.fill_between(df_precip['Date'], df_precip['7_month_MA'], 0, where=df_precip['7_month_MA'] < 0, color='blue', alpha=0.6)
plt.title('7-Month Moving Average Anomalies in Precipitation (2003-2023)')
plt.xlabel('Year')
plt.ylabel('Anomaly (mm)')
plt.axhline(0, color='red', linestyle='--', linewidth=0.8)  # Zero anomaly line
plt.legend()
plt.grid(True)
plt.show()
In [ ]:
df_precip['7_month_MA'].corr(dfss['ONI'])
Out[ ]:
-0.7602157168323691
In [ ]:
def get_season(month):
    if month in [12, 1, 2]:
        return 'DJF'  # Winter
    elif month in [3, 4, 5]:
        return 'MAM'  # Spring
    elif month in [6, 7, 8]:
        return 'JJA'  # Summer
    elif month in [9, 10, 11]:
        return 'SON'  # Autumn

# Apply the function to create a Season column
df_precip['Season'] = df_precip['Date'].dt.month.apply(get_season)
In [ ]:
def get_season(month):
    if month in [12, 1, 2]:
        return 'DJF'  # Winter
    elif month in [3, 4, 5]:
        return 'MAM'  # Spring
    elif month in [6, 7, 8]:
        return 'JJA'  # Summer
    elif month in [9, 10, 11]:
        return 'SON'  # Autumn

# Apply the function to create a Season column
df_precip['Season'] = df_precip['Date'].dt.month.apply(get_season)
In [ ]:
# Group by year and season and calculate the mean anomaly
seasonal_anomalies1 = df_precip.groupby([df_precip['Date'].dt.year, 'Season'])['Anomaly'].mean().unstack()
print(seasonal_anomalies1.head())
#swap the column data where mam should be the second and the jja should be the third and son should be the fourth
column_names = ["DJF", "MAM", "JJA", "SON"]
seasonal_anomalies = seasonal_anomalies1[column_names]
seasonal_anomalies["DJF"] = seasonal_anomalies1["DJF"]
seasonal_anomalies["MAM"] = seasonal_anomalies1["MAM"]
seasonal_anomalies["JJA"] = seasonal_anomalies1["JJA"]
seasonal_anomalies["SON"] = seasonal_anomalies1["SON"]

print(seasonal_anomalies.head())
Season       DJF       JJA       MAM       SON
Date                                          
2003   -0.020808 -0.036521 -0.012247 -0.045926
2004   -0.053504 -0.043280 -0.054735 -0.043186
2005   -0.033881 -0.023153 -0.044261  0.001096
2006   -0.012835  0.003152  0.025943 -0.047733
2007   -0.049259  0.003395 -0.043932  0.002062
Season       DJF       MAM       JJA       SON
Date                                          
2003   -0.020808 -0.012247 -0.036521 -0.045926
2004   -0.053504 -0.054735 -0.043280 -0.043186
2005   -0.033881 -0.044261 -0.023153  0.001096
2006   -0.012835  0.025943  0.003152 -0.047733
2007   -0.049259 -0.043932  0.003395  0.002062
In [ ]:
# Plot seasonal anomalies
seasonal_anomalies.plot(kind='bar', figsize=(18, 6), width=0.8)
plt.title('Seasonal Anomalies in 7-Month Moving Average of Precipitation (2003-2023)')
plt.xlabel('Year')
plt.ylabel('Anomaly (mm)')
plt.legend(title='Season')
plt.grid(axis='y', linestyle='--', linewidth=0.7)
plt.show()
In [ ]:
kss = dfss.drop(['Year', 'index','Value','5_month_MA','PDO','DMI','3_month_MA','7_month_MA','triangular_smoothing3','triangular_smoothing5','triangular_smoothing7','gaussian_smoothing3','gaussian_smoothing5','gaussian_smoothing7','KZ_Filter3','KZ_Filter5','KZ_Filter7'], axis=1)
print(kss.head())
kss["ONI"] = dfss["ONI"].values
        Date   ONI
0 2003-01-01  0.92
1 2003-02-01  0.63
2 2003-03-01  0.38
3 2003-04-01 -0.04
4 2003-05-01 -0.26
In [ ]:
# Assume df_enso is a DataFrame with ENSO data, where ONI values are monthly and have been processed similarly
# Merge ENSO data
df_full = df_precip.merge(kss, on='Date', how='left')
df_full["ONI"] = dfss["ONI"].values
print(df_full.head())
# Group by season and calculate correlations
correlations = df_full.groupby('Season').apply(lambda x: x['7_month_MA'].corr(x['ONI']))
print(correlations)
        Date  Precipitation  7_month_MA  Month  Monthly_Average   Anomaly  \
0 2003-01-31      -0.008463   -0.008463      1         0.002287 -0.010750   
1 2003-02-28       0.031241    0.011389      2         0.003192  0.008198   
2 2003-03-31      -0.033798   -0.003673      3         0.002323 -0.005996   
3 2003-04-30      -0.020413   -0.007858      4         0.001971 -0.009829   
4 2003-05-31      -0.068027   -0.019892      5         0.001024 -0.020916   

  Season   ONI  
0    DJF  0.92  
1    DJF  0.63  
2    MAM  0.38  
3    MAM -0.04  
4    MAM -0.26  
Season
DJF   -0.777837
JJA   -0.677940
MAM   -0.683357
SON   -0.812986
dtype: float64
In [ ]:
# Assuming 'seasonal_anomalies' is your DataFrame with anomalies by season
autumn_anomalies = seasonal_anomalies['SON']  # Assuming 'SON' is the column for Autumn anomalies
plt.figure(figsize=(12, 6))  # Set the figure size as desired
autumn_anomalies.plot(kind='bar', color='orange')  # Use a color that stands out for Autumn
plt.title('Autumn (SON) Seasonal Anomalies in 7-Month Moving Average of Precipitation (2003-2023)')
plt.xlabel('Year')
plt.ylabel('Anomaly (mm)')
plt.axhline(0, color='black', linestyle='--', linewidth=1)  # Zero anomaly line for reference
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()
print(autumn_anomalies)
Date
2003   -0.045926
2004   -0.043186
2005    0.001096
2006   -0.047733
2007    0.002062
2008    0.030507
2009   -0.028871
2010    0.097733
2011   -0.010385
2012   -0.008289
2013    0.043498
2014   -0.027724
2015   -0.093207
2016    0.051357
2017    0.058914
2018   -0.028862
2019   -0.070144
2020    0.057543
2021    0.036540
2022    0.063787
2023   -0.038711
Name: SON, dtype: float64
In [ ]:
import matplotlib.pyplot as plt

# Filter the data for Autumn (SON) season
Autumn_ENO = onia[onia['SEAS'] == 'SON']

# Get unique years
unique_years = Autumn_ENO['Year'].unique()

# Plot the data
plt.figure(figsize=(15, 8))
plt.bar(Autumn_ENO['Year'], Autumn_ENO['ANOM'], color='blue')
plt.title('Oceanic Niño Index in Autumn (SON) Seasons')
plt.xlabel('Year')
plt.ylabel('ONI Value')
plt.grid(True)

# Set x-ticks to unique years
plt.xticks(unique_years)
plt.show()
In [ ]:
fig, axs = plt.subplots(2, 1, figsize=(18, 10))

# Plot the first graph in the first subplot
axs[0].bar(Autumn_ENO['Year'], Autumn_ENO['ANOM'], color='blue')
axs[0].set_title('Oceanic Niño Index in Autumn (SON) Seasons')
axs[0].set_xlabel('Year')
axs[0].set_ylabel('ONI Value')
axs[0].grid(True)
axs[0].set_xticks(unique_years)
axs[0].tick_params(axis='x', rotation=90)

# Plot the second graph in the second subplot
autumn_anomalies.plot(kind='bar', ax=axs[1], color='orange')  # Use a color that stands out for Autumn
axs[1].set_title('Autumn (SON) Seasonal Anomalies in 7-Month Moving Average of Precipitation (2003-2023)')
axs[1].set_xlabel('Year')
axs[1].set_ylabel('Anomaly (mm)')
axs[1].axhline(0, color='black', linestyle='--', linewidth=1)  # Zero anomaly line for reference
axs[1].grid(axis='y', linestyle='--', alpha=0.7)

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the plot
plt.show()
In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Creating a DataFrame
data = pd.DataFrame({
    'Date': df_full['Date'],
    'ENSO': df_full["ONI"],
    'Precipitation_MA7': df_full['7_month_MA']
})
data.set_index('Date', inplace=True)

# 2. Plotting the generated data
plt.figure(figsize=(14, 6))
plt.plot(data.index, data['Precipitation_MA7'], label='7-Month MA Precipitation (mm/hr)')
plt.plot(data.index, data['ENSO'], label='ENSO Index', alpha=0.7)
plt.title('Simulated Precipitation and ENSO Index (2003-2023)')
plt.xlabel('Year')
plt.ylabel('Value')
plt.legend()
plt.grid(True)
plt.show()

# 3. Differencing the data to make it stationary
data['Precipitation_MA7_diff'] = data['Precipitation_MA7'].diff().dropna()
data['ENSO_diff'] = data['ENSO'].diff().dropna()

# Dropping NaN values resulting from differencing
data_diff = data.dropna()

# 4. Re-fitting the SARIMAX model on differenced data
model_diff = SARIMAX(data_diff['Precipitation_MA7_diff'], exog=data_diff[['ENSO_diff']], 
                     order=(1,0,1), seasonal_order=(1,0,1,12))
model_fit_diff = model_diff.fit(disp=False)

# 5. Analyzing the model
# Summary of the model
print(model_fit_diff.summary())

# Generating the residuals plot and ACF/PACF plots for the differenced data
residuals_diff = model_fit_diff.resid
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
plot_acf(residuals_diff, lags=30, ax=ax1)
plot_pacf(residuals_diff, lags=30, ax=ax2)
plt.show()

# Displaying the first few rows of the differenced data
print(data_diff.head())
c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency M will be used.
  self._init_dates(dates, freq)
c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency M will be used.
  self._init_dates(dates, freq)
c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\base\model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
                                     SARIMAX Results                                      
==========================================================================================
Dep. Variable:             Precipitation_MA7_diff   No. Observations:                  251
Model:             SARIMAX(1, 0, 1)x(1, 0, 1, 12)   Log Likelihood                 792.111
Date:                            Sun, 25 Aug 2024   AIC                          -1572.223
Time:                                    12:48:00   BIC                          -1551.070
Sample:                                02-28-2003   HQIC                         -1563.710
                                     - 12-31-2023                                         
Covariance Type:                              opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ENSO_diff     -0.0257      0.005     -5.103      0.000      -0.036      -0.016
ar.L1          0.5611      0.192      2.915      0.004       0.184       0.938
ma.L1         -0.3072      0.220     -1.393      0.164      -0.739       0.125
ar.S.L12      -0.3452      0.303     -1.139      0.255      -0.939       0.249
ma.S.L12       0.5098      0.284      1.796      0.072      -0.047       1.066
sigma2         0.0001   1.07e-05      9.896      0.000    8.46e-05       0.000
===================================================================================
Ljung-Box (L1) (Q):                   0.82   Jarque-Bera (JB):                 0.41
Prob(Q):                              0.37   Prob(JB):                         0.81
Heteroskedasticity (H):               0.99   Skew:                             0.03
Prob(H) (two-sided):                  0.96   Kurtosis:                         2.81
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
            ENSO  Precipitation_MA7  Precipitation_MA7_diff  ENSO_diff
Date                                                                  
2003-02-28  0.63           0.011389                0.019852      -0.29
2003-03-31  0.38          -0.003673               -0.015062      -0.25
2003-04-30 -0.04          -0.007858               -0.004185      -0.42
2003-05-31 -0.26          -0.019892               -0.012034      -0.22
2003-06-30 -0.16          -0.032879               -0.012987       0.10
In [ ]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
import matplotlib.pyplot as plt

# Load your dataset
# Assuming your DataFrame is named df and has a datetime index
# df = pd.read_csv('your_data.csv', index_col='date', parse_dates=True)

# Filter the data to include only the training period
train = df_full[df_full["Date"]<'2021-01']   

# Define the model
model = SARIMAX(train['7_month_MA'], 
                exog=train['ONI'],
                order=(1, 0, 1), 
                seasonal_order=(1, 0, 1, 12),
                enforce_stationarity=False,
                enforce_invertibility=False)

# Fit the model
results = model.fit()
In [ ]:
# Predict the values of precipitation for the test period 2018-2023
# Filter the data to include only the test period
test = df_full[df_full["Date"] >= '2021-01']

print (test.head())
# Predict the values for the test period
predictions = results.get_forecast(steps=len(test), exog=test['ONI'])
predicted_mean = predictions.predicted_mean

plt.figure(figsize=(14, 6))
plt.plot(df_full['Date'], df_full['7_month_MA'], label='7-Month MA Precipitation (mm/hr)')
plt.plot(train['Date'], train['7_month_MA'], label='Training Data', color='blue')
plt.plot(test['Date'], predicted_mean, label='Predictions', color='red')
plt.title('7-Month MA Precipitation Predictions (2018-2023)')
plt.xlabel('Year')
plt.ylabel('Value')
plt.legend()
plt.grid(True)
plt.show()
print(test['7_month_MA'])
          Date  Precipitation  7_month_MA  Month  Monthly_Average   Anomaly  \
216 2021-01-31       0.115892    0.057635      1         0.002287  0.055348   
217 2021-02-28       0.078640    0.059473      2         0.003192  0.056282   
218 2021-03-31      -0.000178    0.056523      3         0.002323  0.054200   
219 2021-04-30      -0.058888    0.035828      4         0.001971  0.033857   
220 2021-05-31       0.016339    0.029131      5         0.001024  0.028107   

    Season   ONI  
216    DJF -1.05  
217    DJF -0.93  
218    MAM -0.84  
219    MAM -0.66  
220    MAM -0.48  
216    0.057635
217    0.059473
218    0.056523
219    0.035828
220    0.029131
221    0.020640
222    0.023172
223    0.021049
224    0.027064
225    0.028446
226    0.054110
227    0.058054
228    0.054751
229    0.065933
230    0.052875
231    0.034326
232    0.036375
233    0.028434
234    0.032837
235    0.053005
236    0.048436
237    0.063988
238    0.078938
239    0.081712
240    0.070313
241    0.070429
242    0.050743
243    0.034198
244    0.014656
245   -0.003346
246   -0.005922
247   -0.011845
248   -0.028761
249   -0.040912
250   -0.046461
251   -0.059636
Name: 7_month_MA, dtype: float64
In [ ]:
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
import matplotlib.pyplot as plt

# Generate synthetic monthly ENSO and precipitation data

# Create DataFrame
data = pd.DataFrame({
    'Date': df_full['Date'],
    'ENSO': df_full['ONI'],
    'Precipitation': df_full['7_month_MA']
}).set_index('Date')

# Compute the cross-correlation
lags = np.arange(-7, 7)
correlations = []
for lag in lags:
    shifted_enso = data['ENSO'].shift(lag).dropna()
    aligned_precipitation = data['Precipitation'].shift(-lag).dropna()
    common_index = shifted_enso.index.intersection(aligned_precipitation.index)
    correlation = pearsonr(shifted_enso.loc[common_index], aligned_precipitation.loc[common_index])[0]
    correlations.append(correlation)

# Plot the cross-correlation
plt.figure(figsize=(10, 5))
plt.plot(lags, correlations, marker='o', linestyle='-', color='b')
plt.title('Cross-Correlation between ENSO and Precipitation')
plt.xlabel('Lag (months)')
plt.ylabel('Correlation Coefficient')
plt.axhline(0, color='black', linewidth=0.5, linestyle='--')
plt.axvline(0, color='black', linewidth=0.5, linestyle='--')
plt.grid(True)
plt.show()
In [ ]:
print(len(shifted_enso))
print(len(aligned_precipitation))
246
246
In [ ]:
print(len(df_full['7_month_MA']))
252
In [ ]:
data = pd.DataFrame({
    'Date': df_full['Date'],
    'ENSO': df_full['ONI'],
    'Precipitation': df_full['7_month_MA']
}).set_index('Date')
lag = 1
shifted_enso = data['ENSO'].shift(lag).dropna()
aligned_precipitation = data['Precipitation'].shift(-lag).dropna()
common_index = shifted_enso.index.intersection(aligned_precipitation.index)
correlation = pearsonr(shifted_enso.loc[common_index], aligned_precipitation.loc[common_index])[0]
correlations.append(correlation)
print(correlation)
-0.7738119187015284
In [ ]:
data = pd.DataFrame({
    'Date': df_full['Date'],
    'ENSO': df_full['ONI'],
    'Precipitation': df_full['7_month_MA']
}).set_index('Date')
lag = 0
shifted_enso = data['ENSO'].shift(lag).dropna()
aligned_precipitation = data['Precipitation'].shift(-lag).dropna()
common_index = shifted_enso.index.intersection(aligned_precipitation.index)
correlation1 = pearsonr(shifted_enso.loc[common_index], aligned_precipitation.loc[common_index])[0]
correlations.append(correlation1)
print(correlation1)
-0.7602157168323697
In [ ]:
print(correlation-correlation1)
-0.013596201869158775
In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import r2_score

# Load the data
data = pd.read_csv('pci.csv', index_col='Date', parse_dates=True)
data_2023 = data[data.index.year == 2023]
print(data_2023.head())
           index  Year     Value
Date                            
2023-01-01   Jan  2023  0.338154
2023-02-01   Feb  2023  0.419176
2023-03-01   Mar  2023  0.282736
2023-04-01   Apr  2023  0.254354
2023-05-01   May  2023  0.269012
In [ ]:
# Split the data into training and testing sets
train_data = data[:'2020']
test_data21 = data[data.index.year == 2021]
test_data22 = data[data.index.year == 2022]
test_data23 = data[data.index.year == 2023]

# Define and fit the SARIMA model
model = SARIMAX(train_data['Value'], order=(3, 0, 8), seasonal_order=(2, 1, 0, 12))
fitted_model = model.fit(disp=False)

# Forecast for the year 2023
forecast = fitted_model.get_forecast(steps=12)
forecast_mean = forecast.predicted_mean
forecast_conf_int = forecast.conf_int()

# Calculate the R² value
r2 = r2_score(test_data22['Value'], forecast_mean)

# Create a DataFrame for comparison
comparison_df = pd.DataFrame({
    'Actual': test_data22['Value'],
    'Forecasted': forecast_mean
})

# Plot the actual vs forecasted values for 2023
plt.figure(figsize=(14, 7))
plt.plot(comparison_df.index, comparison_df['Actual'], marker='o', label='Actual Precipitation', color='red')
plt.plot(comparison_df.index, comparison_df['Forecasted'], marker='x', linestyle='--', label='Forecasted Precipitation', color='green')
plt.title('Actual vs Forecasted Precipitation for 2023')
plt.xlabel('Month')
plt.ylabel('Precipitation (mm/hr)')
plt.xticks(comparison_df.index, [date.strftime('%b') for date in comparison_df.index], rotation=45)
plt.legend()
plt.grid(True)
plt.show()

# Print the R² value
print(f'R² value: {r2}')
c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency MS will be used.
  self._init_dates(dates, freq)
c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:473: ValueWarning: No frequency information was provided, so inferred frequency MS will be used.
  self._init_dates(dates, freq)
c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\base\model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
R² value: -1.251487641412906
In [ ]:
def adf_test(dataset):
    dftest = adfuller(dataset, autolag = 'AIC')
    print("1. ADF : ",dftest[0])
    print("2. P-Value : ", dftest[1])
    print("3. Num Of Lags : ", dftest[2])
    print("4. Num Of Observations Used For ADF Regression and Critical Values Calculation :", dftest[3])
    print("5. Critical Values :")
    for key, val in dftest[4].items():
        print("\t",key, ": ", val)
In [ ]:
adf_test(train_data['Value'])
1. ADF :  -3.880195537739565
2. P-Value :  0.002185529158256005
3. Num Of Lags :  13
4. Num Of Observations Used For ADF Regression and Critical Values Calculation : 202
5. Critical Values :
	 1% :  -3.4631437906252636
	 5% :  -2.8759570379821047
	 10% :  -2.574454682874228
In [ ]:
d = pd.DataFrame()
d["diff"] = train_data["Value"].diff(12)

d["diff"].plot()

adf_test(d["diff"].dropna())

print(d["diff"])
1. ADF :  -3.4871391650542938
2. P-Value :  0.008323596340899617
3. Num Of Lags :  15
4. Num Of Observations Used For ADF Regression and Critical Values Calculation : 188
5. Critical Values :
	 1% :  -3.465620397124192
	 5% :  -2.8770397560752436
	 10% :  -2.5750324547306476
Date
2003-01-01         NaN
2003-02-01         NaN
2003-03-01         NaN
2003-04-01         NaN
2003-05-01         NaN
                ...   
2020-08-01    0.121426
2020-09-01    0.209610
2020-10-01    0.142927
2020-11-01    0.201913
2020-12-01    0.116186
Name: diff, Length: 216, dtype: float64
In [ ]:
from pmdarima import auto_arima
import warnings
warnings.filterwarnings("ignore")
In [ ]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Plot ACF and PACF
plot_acf(train_data['Value'].diff(12).dropna(), lags=40)
plot_pacf(train_data['Value'].diff(12).dropna(), lags=40)
plt.show()
In [ ]:
import itertools
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Define the p, d, q, P, D, Q, m ranges to search over
p = q = P = Q = range(0, 3)  # typically, start with small values like 0, 1, 2
d = D = [0, 1]
m = [12]  # Monthly data, so seasonality period is 12 months

# Create a list of all possible combinations of parameters
parameters = list(itertools.product(p, d, q, P, D, Q, m))

best_aic = float("inf")
best_params = None

# Iterate over all parameter combinations
for params in parameters:
    try:
        model = SARIMAX(train_data["Value"], 
                        order=(params[0], params[1], params[2]), 
                        seasonal_order=(params[3], params[4], params[5], params[6])
                       ).fit(disp=False)
        if model.aic < best_aic:
            best_aic = model.aic
            best_params = params
    except:
        continue

print(f"Best Parameters: {best_params} with AIC: {best_aic}")
Best Parameters: (1, 1, 2, 1, 0, 1, 12) with AIC: -596.1817683487355
In [ ]:
stepwise_fit = auto_arima(train_data['Value'], start_p=0, start_q=0,
                          max_p=4, max_q=4, m=12,
                          seasonal=True,
                          d=None, D=1, trace=True,
                          error_action='ignore',  # don't want to know if an order does not work
                          suppress_warnings=True,  # don't want convergence warnings
                          stepwise=True)  # set to stepwise
stepwise_fit.summary()
Performing stepwise search to minimize aic
 ARIMA(0,0,0)(1,1,1)[12] intercept   : AIC=inf, Time=0.64 sec
 ARIMA(0,0,0)(0,1,0)[12] intercept   : AIC=-387.387, Time=0.08 sec
 ARIMA(1,0,0)(1,1,0)[12] intercept   : AIC=-482.037, Time=0.60 sec
 ARIMA(0,0,1)(0,1,1)[12] intercept   : AIC=inf, Time=0.84 sec
 ARIMA(0,0,0)(0,1,0)[12]             : AIC=-388.893, Time=0.05 sec
 ARIMA(1,0,0)(0,1,0)[12] intercept   : AIC=-448.519, Time=0.12 sec
 ARIMA(1,0,0)(2,1,0)[12] intercept   : AIC=-514.842, Time=1.19 sec
 ARIMA(1,0,0)(2,1,1)[12] intercept   : AIC=inf, Time=3.07 sec
 ARIMA(1,0,0)(1,1,1)[12] intercept   : AIC=inf, Time=0.92 sec
 ARIMA(0,0,0)(2,1,0)[12] intercept   : AIC=-472.323, Time=0.74 sec
 ARIMA(2,0,0)(2,1,0)[12] intercept   : AIC=-536.507, Time=2.17 sec
 ARIMA(2,0,0)(1,1,0)[12] intercept   : AIC=-504.196, Time=0.53 sec
 ARIMA(2,0,0)(2,1,1)[12] intercept   : AIC=inf, Time=2.98 sec
 ARIMA(2,0,0)(1,1,1)[12] intercept   : AIC=inf, Time=1.10 sec
 ARIMA(3,0,0)(2,1,0)[12] intercept   : AIC=-536.590, Time=2.91 sec
 ARIMA(3,0,0)(1,1,0)[12] intercept   : AIC=-505.133, Time=0.68 sec
 ARIMA(3,0,0)(2,1,1)[12] intercept   : AIC=inf, Time=3.20 sec
 ARIMA(3,0,0)(1,1,1)[12] intercept   : AIC=inf, Time=1.67 sec
 ARIMA(4,0,0)(2,1,0)[12] intercept   : AIC=-534.605, Time=2.87 sec
 ARIMA(3,0,1)(2,1,0)[12] intercept   : AIC=-534.586, Time=3.32 sec
 ARIMA(2,0,1)(2,1,0)[12] intercept   : AIC=-536.259, Time=3.06 sec
 ARIMA(4,0,1)(2,1,0)[12] intercept   : AIC=-532.613, Time=3.06 sec
 ARIMA(3,0,0)(2,1,0)[12]             : AIC=-538.484, Time=1.03 sec
 ARIMA(3,0,0)(1,1,0)[12]             : AIC=-507.042, Time=0.57 sec
 ARIMA(3,0,0)(2,1,1)[12]             : AIC=inf, Time=3.38 sec
 ARIMA(3,0,0)(1,1,1)[12]             : AIC=inf, Time=1.11 sec
 ARIMA(2,0,0)(2,1,0)[12]             : AIC=-538.391, Time=0.96 sec
 ARIMA(4,0,0)(2,1,0)[12]             : AIC=-536.499, Time=1.76 sec
 ARIMA(3,0,1)(2,1,0)[12]             : AIC=-536.480, Time=1.17 sec
 ARIMA(2,0,1)(2,1,0)[12]             : AIC=-538.152, Time=1.26 sec
 ARIMA(4,0,1)(2,1,0)[12]             : AIC=-535.119, Time=2.18 sec

Best model:  ARIMA(3,0,0)(2,1,0)[12]          
Total fit time: 49.254 seconds
Out[ ]:
SARIMAX Results
Dep. Variable: y No. Observations: 216
Model: SARIMAX(3, 0, 0)x(2, 1, 0, 12) Log Likelihood 275.242
Date: Sun, 25 Aug 2024 AIC -538.484
Time: 12:55:00 BIC -518.575
Sample: 01-01-2003 HQIC -530.431
- 12-01-2020
Covariance Type: opg
coef std err z P>|z| [0.025 0.975]
ar.L1 0.2698 0.059 4.542 0.000 0.153 0.386
ar.L2 0.3031 0.074 4.110 0.000 0.159 0.448
ar.L3 0.1016 0.079 1.290 0.197 -0.053 0.256
ar.S.L12 -0.5436 0.067 -8.130 0.000 -0.675 -0.413
ar.S.L24 -0.4087 0.074 -5.554 0.000 -0.553 -0.264
sigma2 0.0038 0.000 9.912 0.000 0.003 0.005
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 0.01
Prob(Q): 0.96 Prob(JB): 1.00
Heteroskedasticity (H): 1.19 Skew: -0.01
Prob(H) (two-sided): 0.48 Kurtosis: 3.02


Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [ ]:
stepwise_fit.summary()
Out[ ]:
SARIMAX Results
Dep. Variable: y No. Observations: 216
Model: SARIMAX(3, 0, 0)x(2, 1, 0, 12) Log Likelihood 275.242
Date: Sun, 25 Aug 2024 AIC -538.484
Time: 12:55:00 BIC -518.575
Sample: 01-01-2003 HQIC -530.431
- 12-01-2020
Covariance Type: opg
coef std err z P>|z| [0.025 0.975]
ar.L1 0.2698 0.059 4.542 0.000 0.153 0.386
ar.L2 0.3031 0.074 4.110 0.000 0.159 0.448
ar.L3 0.1016 0.079 1.290 0.197 -0.053 0.256
ar.S.L12 -0.5436 0.067 -8.130 0.000 -0.675 -0.413
ar.S.L24 -0.4087 0.074 -5.554 0.000 -0.553 -0.264
sigma2 0.0038 0.000 9.912 0.000 0.003 0.005
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 0.01
Prob(Q): 0.96 Prob(JB): 1.00
Heteroskedasticity (H): 1.19 Skew: -0.01
Prob(H) (two-sided): 0.48 Kurtosis: 3.02


Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
In [ ]:
#perform the model
model = SARIMAX(train_data['Value'], order=(1, 1,2), seasonal_order=(1, 0, 1, 12))
results = model.fit()
print(results.summary())
# Forecast for the year 2023

forecast = results.get_forecast(steps=12)
forecast_mean = forecast.predicted_mean
forecast_conf_int = forecast.conf_int()

print(get_forecast(steps=12))   
# Calculate the R² value
r2 = r2_score(test_data21['Value'], forecast_mean)
print(f'R² value: {r2}')

# Create a DataFrame for comparison
comparison_df = pd.DataFrame({
    'Actual': test_data21['Value'],
    'Forecasted': forecast_mean
})

# Plot the actual vs forecasted values for 2023
plt.figure(figsize=(14, 7))
plt.plot(comparison_df.index, comparison_df['Actual'], marker='o', label='Actual Precipitation', color='red')
plt.plot(comparison_df.index, comparison_df['Forecasted'], marker='x', linestyle='--', label='Forecasted Precipitation', color='green')
plt.title('Actual vs Forecasted Precipitation for 2023')
plt.xlabel('Month')
plt.ylabel('Precipitation (mm/hr)')
plt.xticks(comparison_df.index, [date.strftime('%b') for date in comparison_df.index], rotation=45)
plt.legend()
plt.grid(True)
plt.show()
                                      SARIMAX Results                                       
============================================================================================
Dep. Variable:                                Value   No. Observations:                  216
Model:             SARIMAX(1, 1, 2)x(1, 0, [1], 12)   Log Likelihood                 304.091
Date:                              Sun, 25 Aug 2024   AIC                           -596.182
Time:                                      12:55:01   BIC                           -575.958
Sample:                                  01-01-2003   HQIC                          -588.010
                                       - 12-01-2020                                         
Covariance Type:                                opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.7935      0.099      7.979      0.000       0.599       0.988
ma.L1         -1.4008      0.140    -10.032      0.000      -1.675      -1.127
ma.L2          0.4082      0.135      3.028      0.002       0.144       0.672
ar.S.L12       0.9979      0.011     93.847      0.000       0.977       1.019
ma.S.L12      -0.9432      0.141     -6.711      0.000      -1.219      -0.668
sigma2         0.0034      0.001      6.460      0.000       0.002       0.004
===================================================================================
Ljung-Box (L1) (Q):                   0.17   Jarque-Bera (JB):                 0.86
Prob(Q):                              0.68   Prob(JB):                         0.65
Heteroskedasticity (H):               1.05   Skew:                            -0.07
Prob(H) (two-sided):                  0.83   Kurtosis:                         2.72
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[103], line 11
      8 forecast_mean = forecast.predicted_mean
      9 forecast_conf_int = forecast.conf_int()
---> 11 print(get_forecast(steps=12))   
     12 # Calculate the R² value
     13 r2 = r2_score(test_data21['Value'], forecast_mean)

NameError: name 'get_forecast' is not defined
In [ ]:
plt.plot(test_data21['Value'], label='Actual')
plt.plot(forecast_mean, label='Predicted')
Out[ ]:
[<matplotlib.lines.Line2D at 0x21318dbb2d0>]
In [ ]:
plt.plot(test_data22['Value'], label='Actual')
Out[ ]:
[<matplotlib.lines.Line2D at 0x21318fda450>]
In [ ]:
from sklearn.metrics import mean_squared_error
from math import sqrt

rmse = sqrt(mean_squared_error(test_data['Value'], forecast_mean))
print(f'RMSE: {rmse:.2f}')
RMSE: 0.08
In [ ]:
# Assuming lsas is a DataFrame or Series
plt.figure(figsize=(14, 7))
plt.plot(lsa, color='blue', label='LSA Data')
plt.title(' Average precipitation of Indonesia Data Over Time')
plt.xlabel('Time')
plt.ylabel('LSA Value')
plt.legend()
plt.grid(True)
plt.show()
In [ ]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Load the dataset
file_path = 'pci.csv'
df = pd.read_csv(file_path)

# Convert 'Date' column to datetime format and set it as the index
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Only keep the 'Value' column for modeling
df = df[['Value']]

# Split the data into training (2003-2022) and testing (2023)
train = df[:'2020-12-01']
test = df['2021-01-01':'2021-12-01']
test2 = df['2022-01-01':'2022-12-01']
test1 = df['2023-01-01':]

# Define the SARIMAX model
model = SARIMAX(train['Value'], order=(2, 0, 1), seasonal_order=(1, 1, 1, 12))

# Fit the model
model_fit = model.fit(disp=False)

# Forecast for the test period (2023)
forecast = model_fit.get_forecast(steps=len(test2))
predicted_values = forecast.predicted_mean
confidence_intervals = forecast.conf_int()

# Calculate RMSE for evaluation
rmse = np.sqrt(mean_squared_error(test2['Value'], predicted_values))
print(f'RMSE: {rmse}')

# Plot the results
plt.figure(figsize=(10, 6))
plt.plot(train.index, train['Value'], label='Train')
plt.plot(test.index, test2['Value'], label='Test', color='orange')
plt.plot(test.index, predicted_values, label='Forecast', color='green')
plt.fill_between(test.index, 
                 confidence_intervals.iloc[:, 0], 
                 confidence_intervals.iloc[:, 1], color='k', alpha=.2)

plt.title('SARIMAX Model - Forecast vs Actuals')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()

# Print the forecasted values and confidence intervals
print('Forecasted Values:')
print(predicted_values)

print('\nConfidence Intervals:')
print(confidence_intervals)
RMSE: 0.071913014053073
Forecasted Values:
2021-01-01    0.375824
2021-02-01    0.347584
2021-03-01    0.340026
2021-04-01    0.316241
2021-05-01    0.301906
2021-06-01    0.290242
2021-07-01    0.240718
2021-08-01    0.195128
2021-09-01    0.213559
2021-10-01    0.255150
2021-11-01    0.311400
2021-12-01    0.391716
Freq: MS, Name: predicted_mean, dtype: float64

Confidence Intervals:
            lower Value  upper Value
2021-01-01     0.267925     0.483723
2021-02-01     0.234356     0.460811
2021-03-01     0.219016     0.461035
2021-04-01     0.191486     0.440996
2021-05-01     0.174525     0.429288
2021-06-01     0.161224     0.419260
2021-07-01     0.110629     0.370807
2021-08-01     0.064348     0.325908
2021-09-01     0.082332     0.344785
2021-10-01     0.123638     0.386663
2021-11-01     0.179714     0.443086
2021-12-01     0.259919     0.523514
In [ ]:
#plot the data of precipitation and actual data of 2023
plt.figure(figsize=(14, 7))
plt.plot(test2, color='blue', label='Precipitation Data')
plt.plot(predicted_values, color='red', label='Actual Precipitation Data')
plt.title('Precipitation Data Over Time')
plt.xlabel('Time')
plt.ylabel('Precipitation Value')
plt.legend()
plt.grid(True)
plt.show()
In [ ]:
#predict the data of 2022
forecast = model_fit.get_forecast(steps=len(test1))
predicted_values = forecast.predicted_mean
confidence_intervals = forecast.conf_int()

# Calculate RMSE for evaluation
rmse = np.sqrt(mean_squared_error(test1['Value'], predicted_values))
print(f'RMSE: {rmse}')

# Plot the results
plt.figure(figsize=(10, 6))
plt.plot(train.index, train['Value'], label='Train')
plt.plot(test.index, test1['Value'], label='Test', color='orange')
plt.plot(test.index, predicted_values, label='Forecast', color='green')
plt.fill_between(test.index, 
                 confidence_intervals.iloc[:, 0], 
                 confidence_intervals.iloc[:, 1], color='k', alpha=.2)

plt.title('SARIMAX Model - Forecast vs Actuals')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()
RMSE: 0.0698779432145898
In [ ]:
r21 = r2_score(lsa.values,test1)
r2 = r2_score(predicted_values,test1)
print(f'R² value: {r2}')    
print(f'R² value: {r21}')
R² value: -0.3793246262297929
R² value: -0.3149617784509602
In [ ]:
data_diff['ENSO_diff']
Out[ ]:
Date
2003-02-28   -0.29
2003-03-31   -0.25
2003-04-30   -0.42
2003-05-31   -0.22
2003-06-30    0.10
              ... 
2023-08-31    0.25
2023-09-30    0.24
2023-10-31    0.22
2023-11-30    0.14
2023-12-31    0.03
Name: ENSO_diff, Length: 251, dtype: float64
In [ ]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Load the dataset
file_path = 'pci.csv'
df = pd.read_csv(file_path)

# Convert 'Date' column to datetime format and set it as the index
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Only keep the 'Value' column for modeling
df = df[['Value']]

# Create a synthetic exogenous variable (e.g., a random series that might affect 'Value')
np.random.seed(0)
df['Exogenous'] = np.random.normal(0, 1, len(df))

# Split the data into training (2003-2022) and testing (2023)
train = df[:'2020-12-01']
test = df['2021-01-01':'2021-12-01']
test2 = df['2022-01-01':'2022-12-01']
test1 = df['2023-01-01':]
print(len(data_diff))
# Prepare the exogenous variables for train and test sets
exog_train = train[['Exogenous']]
exog_test = test[['Exogenous']]

# Define the ARIMAX model
model = SARIMAX(train['Value'], exog=data_diff[['ENSO_diff']], order=(2, 0, 2),seasonal_order=(1, 0, 1, 12))

# Fit the model
model_fit = model.fit(disp=False)

# Forecast for the test period (2023) using the exogenous variables
forecast = model_fit.get_forecast(steps=len(test), exog=exog_test)
predicted_values = forecast.predicted_mean
confidence_intervals = forecast.conf_int()

# Calculate RMSE for evaluation
rmse = np.sqrt(mean_squared_error(test['Value'], predicted_values))
print(f'RMSE: {rmse}')

# Plot the results
plt.figure(figsize=(10, 6))
plt.plot(train.index, train['Value'], label='Train')
plt.plot(test.index, test['Value'], label='Test', color='orange')
plt.plot(test.index, predicted_values, label='Forecast', color='green')
plt.fill_between(test.index, 
                 confidence_intervals.iloc[:, 0], 
                 confidence_intervals.iloc[:, 1], color='k', alpha=.2)

plt.title('ARIMAX Model - Forecast vs Actuals')
plt.xlabel('Date')
plt.ylabel('Value')
plt.legend()
plt.show()

# Print the forecasted values and confidence intervals
print('Forecasted Values:')
print(predicted_values)

print('\nConfidence Intervals:')
print(confidence_intervals)
print(train['Value'].size())
251
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[464], line 33
     30 exog_test = test[['Exogenous']]
     32 # Define the ARIMAX model
---> 33 model = SARIMAX(train['Value'], exog=data_diff[['ENSO_diff']], order=(2, 0, 2),seasonal_order=(1, 0, 1, 12))
     35 # Fit the model
     36 model_fit = model.fit(disp=False)

File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\tsa\statespace\sarimax.py:328, in SARIMAX.__init__(self, endog, exog, order, seasonal_order, trend, measurement_error, time_varying_regression, mle_regression, simple_differencing, enforce_stationarity, enforce_invertibility, hamilton_representation, concentrate_scale, trend_offset, use_exact_diffuse, dates, freq, missing, validate_specification, **kwargs)
    318 def __init__(self, endog, exog=None, order=(1, 0, 0),
    319              seasonal_order=(0, 0, 0, 0), trend=None,
    320              measurement_error=False, time_varying_regression=False,
   (...)
    325              freq=None, missing='none', validate_specification=True,
    326              **kwargs):
--> 328     self._spec = SARIMAXSpecification(
    329         endog, exog=exog, order=order, seasonal_order=seasonal_order,
    330         trend=trend, enforce_stationarity=None, enforce_invertibility=None,
    331         concentrate_scale=concentrate_scale, dates=dates, freq=freq,
    332         missing=missing, validate_specification=validate_specification)
    333     self._params = SARIMAXParams(self._spec)
    335     # Save given orders

File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\tsa\arima\specification.py:446, in SARIMAXSpecification.__init__(self, endog, exog, order, seasonal_order, ar_order, diff, ma_order, seasonal_ar_order, seasonal_diff, seasonal_ma_order, seasonal_periods, trend, enforce_stationarity, enforce_invertibility, concentrate_scale, trend_offset, dates, freq, missing, validate_specification)
    441         exog = np.c_[trend_data, exog]
    443 # Create an underlying time series model, to handle endog / exog,
    444 # especially validating shapes, retrieving names, and potentially
    445 # providing us with a time series index
--> 446 self._model = TimeSeriesModel(endog, exog=exog, dates=dates, freq=freq,
    447                               missing=missing)
    448 self.endog = None if faux_endog else self._model.endog
    449 self.exog = self._model.exog

File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\tsa\base\tsa_model.py:470, in TimeSeriesModel.__init__(self, endog, exog, dates, freq, missing, **kwargs)
    467 def __init__(
    468     self, endog, exog=None, dates=None, freq=None, missing="none", **kwargs
    469 ):
--> 470     super().__init__(endog, exog, missing=missing, **kwargs)
    472     # Date handling in indexes
    473     self._init_dates(dates, freq)

File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\base\model.py:270, in LikelihoodModel.__init__(self, endog, exog, **kwargs)
    269 def __init__(self, endog, exog=None, **kwargs):
--> 270     super().__init__(endog, exog, **kwargs)
    271     self.initialize()

File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\base\model.py:95, in Model.__init__(self, endog, exog, **kwargs)
     93 missing = kwargs.pop('missing', 'none')
     94 hasconst = kwargs.pop('hasconst', None)
---> 95 self.data = self._handle_data(endog, exog, missing, hasconst,
     96                               **kwargs)
     97 self.k_constant = self.data.k_constant
     98 self.exog = self.data.exog

File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\base\model.py:135, in Model._handle_data(self, endog, exog, missing, hasconst, **kwargs)
    134 def _handle_data(self, endog, exog, missing, hasconst, **kwargs):
--> 135     data = handle_data(endog, exog, missing, hasconst, **kwargs)
    136     # kwargs arrays could have changed, easier to just attach here
    137     for key in kwargs:

File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\base\data.py:675, in handle_data(endog, exog, missing, hasconst, **kwargs)
    672     exog = np.asarray(exog)
    674 klass = handle_data_class_factory(endog, exog)
--> 675 return klass(endog, exog=exog, missing=missing, hasconst=hasconst,
    676              **kwargs)

File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\base\data.py:89, in ModelData.__init__(self, endog, exog, missing, hasconst, **kwargs)
     87 self.k_constant = 0
     88 self._handle_constant(hasconst)
---> 89 self._check_integrity()
     90 self._cache = {}

File c:\Users\Naveen Sabarinath\anaconda3\Lib\site-packages\statsmodels\base\data.py:533, in PandasData._check_integrity(self)
    529 # exog can be None and we could be upcasting one or the other
    530 if (exog is not None and
    531         (hasattr(endog, 'index') and hasattr(exog, 'index')) and
    532         not self.orig_endog.index.equals(self.orig_exog.index)):
--> 533     raise ValueError("The indices for endog and exog are not aligned")
    534 super(PandasData, self)._check_integrity()

ValueError: The indices for endog and exog are not aligned
In [ ]:
print(train['Value'].size())
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[465], line 1
----> 1 print(train['Value'].size())

TypeError: 'int' object is not callable
In [ ]:
#plot the data of precipitation and actual data of 2023
plt.figure(figsize=(14, 7))
plt.plot(test['Value'], color='blue', label='Precipitation Data')
plt.plot(predicted_values, color='red', label='predicted Precipitation Data')
plt.plot
plt.title('Precipitation Data Over Time')
plt.xlabel('Time')
plt.ylabel('Precipitation Value')
plt.legend()
plt.grid(True)
plt.show()
In [ ]:
r2 = r2_score(test['Value'], predicted_values)
r21 = r2_score(test['Value'], lsa.values)
print(f'R² value: {r2}')
print(f'R² value: {r21}')  
R² value: 0.13574720534345386
R² value: 0.10433376681855322
In [ ]:
k= df["Value"].diff(36)
print(k)
Date
2003-01-01         NaN
2003-02-01         NaN
2003-03-01         NaN
2003-04-01         NaN
2003-05-01         NaN
                ...   
2023-08-01   -0.076554
2023-09-01   -0.128812
2023-10-01   -0.186202
2023-11-01   -0.121039
2023-12-01   -0.121389
Name: Value, Length: 252, dtype: float64
In [ ]:
plt.plot(k.dropna())
Out[ ]:
[<matplotlib.lines.Line2D at 0x21318e6b550>]
In [ ]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import r2_score
from itertools import product
import warnings

# Load your data
# Replace 'your_pci_data.csv' and 'your_oni_data.csv' with the paths to your datasets
pci_data = pd.read_csv('pci.csv', parse_dates=['Date'])
oni_data = pd.read_csv('oni_data.csv')

# Parse the Date column in pci_data
pci_data['Date'] = pd.to_datetime(pci_data['Date'])

# Summarize the PCI data by year
pci_yearly = pci_data.groupby(pci_data['Date'].dt.year)['Value'].mean().reset_index()

# Rename columns for clarity
pci_yearly.columns = ['Year', 'Average_PCI']

# Merge the ONI and PCI data on the Year column
merged_data = pd.merge(pci_yearly, oni_data, on='Year', how='inner')

# Display the first few rows of the merged dataset
merged_data.head()
Out[ ]:
Year Average_PCI SEAS Total ANOM
0 2003 0.245826 DJF 27.51 0.92
1 2003 0.245826 JFM 27.41 0.63
2 2003 0.245826 FMA 27.58 0.38
3 2003 0.245826 MAM 27.56 -0.04
4 2003 0.245826 AMJ 27.48 -0.26
In [ ]:
# Separate plots for clarity
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 12))

# Plotting Average PCI trends
ax1.plot(pci_yearly['Year'], pci_yearly['Average_PCI'], marker='o', linestyle='-', color='b')
ax1.set_title('Yearly Average PCI Trend')
ax1.set_xlabel('Year')
ax1.set_ylabel('Average PCI')
ax1.grid(True)

# Since ONI data has multiple entries per year (seasonal), we need to handle it differently.
# We will use a line plot to connect the average anomalies of each year.
oni_yearly_avg = merged_data.groupby('Year')['ANOM'].mean().reset_index()

# Plotting ONI trends
ax2.plot(oni_yearly_avg['Year'], oni_yearly_avg['ANOM'], marker='o', linestyle='-', color='r')
ax2.set_title('Yearly Average ONI Anomaly Trend')
ax2.set_xlabel('Year')
ax2.set_ylabel('Average ONI Anomaly')
ax2.grid(True)

plt.tight_layout()
plt.show()

# Calculate the Pearson correlation coefficient between the Average_PCI and the average ONI anomaly
correlation = pci_yearly['Average_PCI'].corr(oni_yearly_avg['ANOM'])

correlation
Out[ ]:
-0.7870259995699593
In [ ]:
# Check available years in both datasets to ensure we have the required data for modeling and testing
available_years_pci = pci_yearly['Year'].unique()
available_years_oni = oni_data['Year'].unique()

available_years_pci, available_years_oni
Out[ ]:
(array([2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
        2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023]),
 array([1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960,
        1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971,
        1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982,
        1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993,
        1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
        2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,
        2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024], dtype=int64))
In [ ]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Prepare the data for training and testing
train_data = merged_data[merged_data['Year'] <= 2020]
test_data = merged_data[merged_data['Year'] >= 2021]

# Features (ONI anomalies) and target (PCI values)
X_train = train_data[['ANOM']]
y_train = train_data['Average_PCI']
X_test = test_data[['ANOM']]
y_test = test_data['Average_PCI']

# Initialize and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict using the model
predictions = model.predict(X_test)

# Calculate the R² value
r2 = r2_score(y_test, predictions)
r2
Out[ ]:
0.4124657629085594